homework 5

The link to my github repository: homework 5

Problem 1

a.

Show the code
setClass(
  "waldCI",
  slots = list(
    lb    = "numeric",
    ub    = "numeric",
    mean  = "numeric",
    sterr = "numeric",
    level = "numeric"
  ),
  validity = function(object) {
    if (!is.finite(object@level) || object@level <= 0 || object@level >= 1)
      return("level must be between 0 and 1.")

    if (!is.finite(object@lb) || !is.finite(object@ub))
      return("the bound is infinite.")

    if (!is.na(object@lb) && !is.na(object@ub)) {
      if (object@lb >= object@ub)
        return("lb must be < ub.")
    }  
    TRUE
  }
)

############## CONSTRUCTOR ##############

waldCI <- function(level, mean = NULL, sterr = NULL,
                               lb = NULL, ub = NULL) {
  
   z <- qnorm(1 - (1 - level)/2)
   if (!is.null(mean) && !is.null(sterr)){
     lb <- mean - z * sterr
     ub <- mean + z * sterr
   } else if (!is.null(lb) && !is.null(ub)) {
    mean <- (lb + ub) / 2
    sterr <- (ub - lb) / (2 * z)
  } else {
    stop("You must provide either (mean, sterr) or (lb, ub).")
  }
  obj <- new("waldCI",
             lb = lb, ub = ub,
             mean = mean, sterr = sterr,
             level = level)
  
  validObject(obj)
  obj
}

############## SHOW METHOD ##############

setMethod("show", "waldCI",
          function(object) {
            cat("Wald Confidence Interval\n")
            cat(sprintf("Level: %.3f\n", object@level))
            cat(sprintf("Mean:  %.4f\n", object@mean))
            cat(sprintf("SE:    %.4f\n", object@sterr))
            cat(sprintf("CI:   [%.4f, %.4f]\n", object@lb, object@ub))
          })

############## ACCESSORS ##############

setGeneric("lb", function(x) standardGeneric("lb"))
[1] "lb"
Show the code
setGeneric("ub", function(x) standardGeneric("ub"))
[1] "ub"
Show the code
setGeneric("mean", function(x) standardGeneric("mean"))
[1] "mean"
Show the code
setGeneric("sterr", function(x) standardGeneric("sterr"))
[1] "sterr"
Show the code
setGeneric("level", function(x) standardGeneric("level"))
[1] "level"
Show the code
setMethod("lb", "waldCI", function(x) x@lb)
setMethod("ub", "waldCI", function(x) x@ub)
setMethod("mean", "waldCI", function(x) x@mean)
setMethod("sterr", "waldCI", function(x) x@sterr)
setMethod("level", "waldCI", function(x) x@level)

############## SETTERS (WITH VALIDATION) ##############

setGeneric("lb<-", function(x, value) standardGeneric("lb<-"))
[1] "lb<-"
Show the code
setGeneric("ub<-", function(x, value) standardGeneric("ub<-"))
[1] "ub<-"
Show the code
setGeneric("mean<-", function(x, value) standardGeneric("mean<-"))
[1] "mean<-"
Show the code
setGeneric("sterr<-", function(x, value) standardGeneric("sterr<-"))
[1] "sterr<-"
Show the code
setGeneric("level<-", function(x, value) standardGeneric("level<-"))
[1] "level<-"
Show the code
setMethod("lb<-", "waldCI",
                 function(x, value) {
                   x@lb <- value; validObject(x); x
                 })

setMethod("ub<-", "waldCI",
                 function(x, value) {
                   x@ub <- value; validObject(x); x
                 })

setMethod("mean<-", "waldCI",
                 function(x, value) {
                   x@mean <- value; validObject(x); x
                 })

setMethod("sterr<-", "waldCI",
                 function(x, value) {
                   x@sterr <- value; validObject(x); x
                 })

setMethod("level<-", "waldCI",
                 function(x, value) {
                   x@level <- value; validObject(x); x
                 })

############## CONTAINS FUNCTION ##############

setGeneric("contains", function(object, value) standardGeneric("contains"))
[1] "contains"
Show the code
setMethod("contains", c("waldCI","numeric"),
                 function(object, value) {
                   value >= object@lb & value <= object@ub
                 })

############## OVERLAP FUNCTION ##############

setGeneric("overlap", function(object1, object2) standardGeneric("overlap"))
[1] "overlap"
Show the code
setMethod("overlap", c("waldCI","waldCI"),
                 function(object1, object2) {
                   !(object1@ub < object2@lb || object1@lb > object2@ub)
                 })

############## AS.NUMERIC ##############

setMethod("as.numeric", "waldCI",
          function(x) c(x@lb, x@ub))

############## TRANSFORM CI ##############

setGeneric("transformCI", function(ci, f) standardGeneric("transformCI"))
[1] "transformCI"
Show the code
setMethod("transformCI", c("waldCI","function"),
          function(ci, f) {
            warning("Only monotonic transformations preserve CI meaning.")
            lb2 <- f(ci@lb)
            ub2 <- f(ci@ub)
            waldCI(level = ci@level,
                   lb = min(lb2, ub2),
                   ub = max(lb2, ub2))
          })

b.

Show the code
ci1 <- waldCI(level = 0.95, lb = 17.2, ub = 24.7)
ci2 <- waldCI(level = 0.99, mean = 13, sterr = 2.5)
ci3 <- waldCI(level = 0.75, lb = 27.43, ub = 39.22)
Show the code
ci1
Wald Confidence Interval
Level: 0.950
Mean:  20.9500
SE:    1.9133
CI:   [17.2000, 24.7000]
Show the code
ci2
Wald Confidence Interval
Level: 0.990
Mean:  13.0000
SE:    2.5000
CI:   [6.5604, 19.4396]
Show the code
ci3
Wald Confidence Interval
Level: 0.750
Mean:  33.3250
SE:    5.1245
CI:   [27.4300, 39.2200]
Show the code
as.numeric(ci1)
[1] 17.2 24.7
Show the code
as.numeric(ci2)
[1]  6.560427 19.439573
Show the code
as.numeric(ci3)
[1] 27.43 39.22
Show the code
lb(ci2)
[1] 6.560427
Show the code
ub(ci2)
[1] 19.43957
Show the code
mean(ci1)
[1] 20.95
Show the code
sterr(ci3)
[1] 5.12453
Show the code
level(ci2)
[1] 0.99
Show the code
lb(ci2) <- 10.5
mean(ci3) <- 34
level(ci3) <- .8
contains(ci1, 17)
[1] FALSE
Show the code
contains(ci3, 44)
[1] FALSE
Show the code
overlap(ci1, ci2)
[1] TRUE
Show the code
eci1 <- transformCI(ci1, sqrt)
Warning in transformCI(ci1, sqrt): Only monotonic transformations preserve CI
meaning.
Show the code
eci1
Wald Confidence Interval
Level: 0.950
Mean:  4.5586
SE:    0.2099
CI:   [4.1473, 4.9699]
Show the code
mean(transformCI(ci2, log))
Warning in transformCI(ci2, log): Only monotonic transformations preserve CI
meaning.
[1] 2.659343

c.

Show the code
#negative standard error
waldCI(level = 0.95, mean = 1, sterr = -1)
Error in validObject(.Object): invalid class "waldCI" object: lb must be < ub.
Show the code
#lb > ub
waldCI(level = 0.95, lb = 2, ub = 1)
Error in validObject(.Object): invalid class "waldCI" object: lb must be < ub.
Show the code
#Infinite bounds
waldCI(level = 0.95, lb = -Inf, ub = Inf)
Error in validObject(.Object): invalid class "waldCI" object: the bound is infinite.
Show the code
#invalid use of the setters
ci <- waldCI(level = 0.95, lb = 1, ub = 2)
lb(ci) <- 3
Error in validObject(x): invalid class "waldCI" object: lb must be < ub.

Problem 3

Show the code
data <- read.csv("https://raw.githubusercontent.com/nytimes/covid-19-data/refs/heads/master/rolling-averages/us-states.csv")
head(data)
        date  geoid      state cases cases_avg cases_avg_per_100k deaths
1 2020-01-21 USA-53 Washington     1      0.14                  0      0
2 2020-01-22 USA-53 Washington     0      0.14                  0      0
3 2020-01-23 USA-53 Washington     0      0.14                  0      0
4 2020-01-24 USA-53 Washington     0      0.14                  0      0
5 2020-01-24 USA-17   Illinois     1      0.14                  0      0
6 2020-01-25 USA-53 Washington     0      0.14                  0      0
  deaths_avg deaths_avg_per_100k
1          0                   0
2          0                   0
3          0                   0
4          0                   0
5          0                   0
6          0                   0
Show the code
colnames(data)
[1] "date"                "geoid"               "state"              
[4] "cases"               "cases_avg"           "cases_avg_per_100k" 
[7] "deaths"              "deaths_avg"          "deaths_avg_per_100k"

###a. How many major and minor spikes in cases were there?

Show the code
#chatgpt taught me how to use ymd() to change date into date type so that it can be further manipulated.
library(plotly)
Loading required package: ggplot2

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
Show the code
library(dplyr)

Attaching package: 'dplyr'
The following object is masked _by_ '.GlobalEnv':

    contains
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
Show the code
library(lubridate)

Attaching package: 'lubridate'
The following objects are masked from 'package:base':

    date, intersect, setdiff, union
Show the code
covid <- data %>% 
  group_by(date) %>%
  summarise(usa_cases = mean(cases_avg_per_100k))

covid$date <- ymd(covid$date)

fig <- covid %>%
  plot_ly(
    x = ~date,
    y = ~usa_cases,
    type = 'scatter',
    mode = 'lines+markers',
    line = list(width = 2)
  ) %>%
  layout(
    xaxis = list(
      title = "Date",
      tickformat = "%b %Y",  
      dtick = "M3",            
      tickangle = 45          
    ),
    yaxis = list(title = "Cases per 100k"),
    template = "plotly_white"
  )

fig

b.

For the states with the highest and lowest overall rates per population, what differences do you see in their trajectories over time?

Show the code
states <- data %>% 
  group_by(state) %>%
  summarise(overall_rate = mean(cases_avg,na.rm = TRUE))
high_state <- states %>% slice_max(overall_rate, n = 1) %>% pull(state)
low_state  <- states %>% slice_min(overall_rate, n = 1) %>% pull(state)

prob_b <- data %>% 
  filter(state %in% c(high_state,low_state))

prob_b$date <- ymd(prob_b$date)
fig <- prob_b %>%
  plot_ly(
    x = ~date,
    y = ~cases_avg,
    color = ~state,
    type = "scatter",
    mode = "lines",
    line = list(width = 2)
  ) %>%
  layout(
    xaxis = list(
      title = "Date",
      tickformat = "%b %Y",  
      dtick = "M3",          
      tickangle = 45
    ),
    yaxis = list(title = "Cases per day"),
    template = "plotly_white"
  )

fig
Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

c. 

Identify, to the best of your ability without a formal test, the first five states to experience Covid in a substantial way.

Show the code
first_wave <- data %>%
  filter(cases_avg_per_100k > 1) %>%
  group_by(state) %>%
  summarize(first_date = min(date)) %>%
  arrange(first_date) %>%
  slice_head(n = 5)

first_states <- first_wave$state

cat("The first five states to ecperience Covid in a substantial way are", first_states, sep = ", ")
The first five states to ecperience Covid in a substantial way are, Washington, New York, Guam, Louisiana, New Jersey
Show the code
plot_data <- data %>%
  filter(state %in% first_states) %>%
  mutate(date = ymd(date))

fig <- plot_data %>%
  plot_ly(
    x = ~date,
    y = ~cases_avg_per_100k,
    color = ~state,
    type = "scatter",
    mode = "lines",
    line = list(width = 1)
  ) %>%
  layout(
    xaxis = list(
      title = "Date",
      tickformat = "%b %Y",   # Month Year
      dtick = "M3",           # every 3 months
      tickangle = 45
    ),
    yaxis = list(title = "Cases per 100k"),
    template = "plotly_white"
  )

fig

The first five states to ecperience Covid in a substantial way are: Washington, New York, Guam, Louisiana, New Jersey.